home *** CD-ROM | disk | FTP | other *** search
- /*
- Sherlock File Comparison Program.
-
- source: sdif.c
- started: February 11, 1988
- version: see below
-
-
- PUBLIC DOMAIN SOFTWARE
-
- Sherlock, including the SPP, SDEL and SDIF programs, was placed in
- the public domain on June 15, 1991, by its author,
-
- Edward K. Ream
- 166 North Prospect Ave.
- Madison, WI 53705.
- (608) 257-0802
-
- Sherlock may be used for any commercial or non-commercial purpose.
-
-
- DISCLAIMER OF WARRANTIES
-
- Edward K. Ream (Ream) specifically disclaims all warranties,
- expressed or implied, with respect to this computer software,
- including but not limited to implied warranties of merchantability
- and fitness for a particular purpose. In no event shall Ream be
- liable for any loss of profit or any commercial damage, including
- but not limited to special, incidental consequential or other damages.
- */
-
- /*
- Define the compiler to be used (usually from the command line.)
-
- MICRO_SOFT Use MicroSoft v4.00
- TURBOC Use Turbo C v1.0
- */
-
- /*
- Miscellaneous global constants.
- */
- #define TRUE (1)
- #define FALSE (0)
- #define BAD_EXIT 1
- typedef int bool;
-
- /*
- Include subsidiary header files.
-
- SL.H MUST be included even if SHERLOCK.C is not linked in.
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include <string.h>
- #include <process.h>
- #include <io.h>
- #include "sl.h"
-
- #define SIGNON "SDIF v1.7: June 15, 1991"
-
- #ifdef SHERLOCK
- #define USAGE1\
- "usage: SDIF in1(with macros) in2(without) [options] ++/--tracepoint\n\n"
- #else
- #define USAGE1\
- "usage: SDIF in1(with macros) in2(without) [options]\n\n"
- #endif
-
- #define USAGE2 "-b Report inserted blank lines.\n"
- #define USAGE3 "-s Report detailed status of comparison.\n"
- #define USAGE4 "-v List all lines of in1 file.\n"
- #define USAGE5 "-? Print version number and exit.\n"
-
-
- /*
- There are two windows, one for each file. Each window holds up to
- WINDOW_LINES lines and up to WINDOW_CHARS characters. These windows
- are used to do "look-ahead" comparisons of lines.
-
- Lines are inserted from the back of the window buffers and deleted from
- the front. When the back of the window buffer can not hold the next
- line, the non-deleted lines are moved to the front of the buffer. The
- window buffer is made much larger than required lines so this moving of
- lines in the window buffer doesn't happen often.
-
- We expect an average line size of less than 40, so that WINDOW_LINES
- lines will take about 160 characters. Thus, the window will have to be
- repacked about once in every 100 inserted lines. This will not
- slow down the program in any way.
- */
- #define WINDOW_LINES 20
- #define WINDOW_CHARS 7000
-
- /* Global flags. */
- bool b_flag = FALSE;
- bool v_flag = FALSE;
- bool s_flag = FALSE;
-
- /* Define the windows. */
- typedef struct {
- FILE * file; /* File handle. */
- bool eof; /* End of file flag. */
- int line; /* Current line number. */
- int nlines; /* # of lines in window. */
- int index[WINDOW_LINES]; /* Indices into window. */
- char window[WINDOW_CHARS]; /* Chars of window. */
- int first; /* Index of first character. */
- int last; /* Index of last character+1. */
- } w_type;
-
- w_type w1, w2;
-
- /* Global file names. */
- char *in1 = NULL;
- char *in2 = NULL;
-
- /*
- Function prototypes.
- */
- void advance (w_type *wp);
- bool fill_buf (char * buffer, w_type *wp);
- void insert (w_type *wp);
- void print_change (int n);
- void print_insert (int n1, int n2);
- void print_match (void);
- bool resynch (int n1, int n2);
- void sdif (void);
-
- /* Main routine. Process command line arguments. */
- int
- main(int argc, char **argv)
- {
- char *arg;
- int i;
-
- /* These two calls MUST come before any others. */
- SL_INIT();
- SL_PARSE(argc, argv, "++", "--");
-
- TRACEPB("main", printf("(%d, %p)\n", argc, argv));
-
- /* Always put out the sign on message. */
- printf("%s\n", SIGNON);
-
- /* Make first test for correct command line. */
- if (argc == 2 && (strcmp(argv[1], "-?")==0)) {
- exit(BAD_EXIT);
- }
- else if (argc < 3) {
- printf("%s%s%s%s%s", USAGE1, USAGE2, USAGE3, USAGE4, USAGE5);
- exit(BAD_EXIT);
- }
-
- /* Process all the arguments on the command line. */
- argc--;
- argv++;
- while (argc-- > 0) {
- arg = *argv++;
-
- if (strcmp(arg, "-b")==0) {
- b_flag = TRUE;
- }
- else if (strcmp(arg, "-s")==0) {
- s_flag = TRUE;
- }
- else if (strcmp(arg, "-v")==0) {
- v_flag = TRUE;
- }
- else if (strcmp(arg, "-?")==0) {
- /* Ignore it. */
- ;
- }
- else if (in1 == NULL) {
- in1 = arg;
- }
- else if (in2 == NULL) {
- in2 = arg;
- }
- else {
- printf("Extra file argument: %s\n", arg);
- exit(BAD_EXIT);
- }
- }
-
- /* Open the input files. */
- w1.file = fopen(in1, "r");
- if (w1.file == NULL) {
- printf("Can not open %s\n", in1);
- exit(BAD_EXIT);
- }
- w2.file = fopen(in2, "r");
- if (w2.file == NULL) {
- printf("Can not open %s\n", in2);
- fclose(w2.file);
- exit(BAD_EXIT);
- }
-
- /* Initialize the windows. */
- w1.line = 1;
- w2.line = 1;
- w1.eof = FALSE;
- w2.eof = FALSE;
- w1.nlines = 0;
- w2.nlines = 0;
- w1.first = 0;
- w2.first = 0;
- w1.last = 0;
- w2.last = 0;
- for (i = 0; i < WINDOW_LINES; i++) {
- w1.index[i] = 0;
- w2.index[i] = 0;
- }
-
- /* Compare the two files and print out differences. */
- sdif();
-
- /* Close the files. */
- fclose(w1.file);
- fclose(w2.file);
-
- /* Print out statistics. */
- TRACE("dump", SL_DUMP());
-
- RETURN_VOID("main");
- }
-
- /*
- Compare two files line by line.
- Print lines that do not match.
- Assume that file 1 contains any inserted lines.
- */
- void
- sdif(void)
- {
- int i, j;
-
- TICKB("sdif");
-
- /* Fill up the window buffers. */
- for (i = 0; i < WINDOW_LINES; i++) {
- insert(&w1);
- insert(&w2);
- }
-
- loop:
- if (w1.nlines == 0 && w2.nlines == 0) {
- RETURN_VOID("sdif");
- }
- else if (w1.nlines == 0 && w2.nlines >= 10) {
- printf("\nFile %s ends before file %s\n", in1, in2);
- RETURN_VOID("sdif");
- }
- else if (w2.nlines == 0 && w1.nlines >= 10) {
- printf("\nFile %s ends before file %s\n", in2, in1);
- RETURN_VOID("sdif");
- }
-
- if(compare(0, 0)) {
- /* Lines match. */
- print_match();
- advance(&w1);
- advance(&w2);
- goto loop;
- }
-
- /* Look for some changed or inserted lines. */
- for (i = 1; i < WINDOW_LINES; i++) {
- /* 3/9/89: don't resynch on duplicated lines. */
- if (resynch(i, i) && !compare(i, i+1)) {
- if (s_flag) {
- printf("----- %d changed lines\n", i);
- }
- for (j = 0; j < i; j++) {
- print_change(j);
- }
- for (j = 0; j < i; j++) {
- advance(&w1);
- advance(&w2);
- }
- goto loop;
- }
-
- if (resynch(i, 0)) {
- if (s_flag) {
- printf("----- %d inserted lines\n", i);
- }
- for (j = 0; j < i; j++) {
- print_insert(j, -1);
- }
- for (j = 0; j < i; j++) {
- advance(&w1);
- }
- goto loop;
- }
- }
-
- /*
- Look for lines inserted in file 2.
- This can happen as a result of previous erroneous advances.
- */
- for (i = 1; i < WINDOW_LINES; i++) {
- if (resynch(0, i)) {
- if (s_flag) {
- printf("----- %d back inserted lines\n", i);
- }
- for (j = 0; j < i; j++) {
- print_insert(-1, j);
- }
- for (j = 0; j < i; j++) {
- advance(&w2);
- }
- goto loop;
- }
- }
-
- /*
- We haven't identified either a single group of insertions or
- a single group of changed lines. We have probably just seen
- a combination of changes and insertions. Just advance both
- files one line each. We'll get back in synch quickly.
- */
-
- if (s_flag) {
- printf("----- failure advance\n");
- }
- if (w1.nlines) {
- print_insert(0, -1);
- }
- else {
- print_insert(-1, 0);
- }
- advance(&w1);
- advance(&w2);
- goto loop;
- }
-
- /*
- Advance one line in the indicated window.
- This frees up space at the beginning of the window buffer.
- */
- void
- advance(w_type *wp)
- {
- int freed;
- int i;
- int lines;
-
- TRACEPB("advance", printf("(%p)\n", wp));
-
- lines = wp -> nlines;
- if (lines == 0) {
- RETURN_VOID("advance");
- }
- freed = strlen(&wp->window[wp->index[0]])+1;
-
- wp -> first += freed;
- lines--;
- for (i = 0; i < lines; i++) {
- wp -> index[i] = wp -> index[i+1];
- }
- wp -> nlines--;
- wp -> line++;
-
- /* Refill the buffer. */
- if (wp -> nlines == WINDOW_LINES-1) {
- insert(wp);
- }
-
- TICKX("advance");
- }
-
-
- /*
- Return TRUE if the indicated lines match.
- */
- bool
- compare(int n1, int n2)
- {
- char *p1, *p2;
- int i;
-
- TRACEPB("compare", printf("(%d, %d)\n", n1, n2));
-
- if (n1 >= w1.nlines || n2 >= w2.nlines) {
-
- RETURN_BOOL("compare", FALSE);
- }
- else {
- p1 = &w1.window[w1.index[n1]];
- p2 = &w2.window[w2.index[n2]];
-
- RETURN_BOOL("compare", strcmp(p1, p2) == 0);
- }
- }
-
- /*
- Fill a buffer from a file.
- Set the end of file flag if appropriate.
- */
- bool
- fill_buf(char *buffer, w_type *wp)
- {
- int c;
- int i;
-
- TRACEPB("fill_buf", printf("(%p, %p)\n", buffer, wp));
-
- if (wp -> eof) {
- RETURN_BOOL("fill_buf", FALSE);
- }
- else {
- for (i = 0;;) {
- c = fgetc(wp -> file);
- if (c == '\r') {
- continue;
- }
- if (c == EOF) {
- wp -> eof = TRUE;
-
- if (i == 0) {
- RETURN_BOOL("fill_buf", FALSE);
- }
- break;
- }
- else if (c == '\n') {
- buffer[i++] = c;
- break;
- }
- else {
- buffer[i++] = c;
- }
- }
- }
- buffer[i] = '\0';
-
- RETURN_BOOL("fill_buf", TRUE);
- }
-
- /*
- Insert a line at the end of the window.
- Pack the buffer if required.
- */
- void
- insert(w_type *wp)
- {
- char buffer [1000];
- int size, avail;
- int i, p, q;
-
- TRACEPB("insert", printf("(%p)\n", wp));
-
- if (!fill_buf(buffer, wp)) {
- RETURN_VOID("insert");
- }
- size = strlen(buffer)+1;
- avail = WINDOW_CHARS - wp -> last;
-
- if (wp -> nlines >= WINDOW_LINES) {
- printf("insert: too many lines.\n");
- RETURN_VOID("insert");
- }
-
- /* Compact buffer. */
- if (size >= avail) {
- /* Adjust indices. */
- for (i = 0; i < wp -> nlines; i++) {
- wp -> index[i] -= wp -> first;
- }
- /* Move the characters in the buffer. */
- for (p = wp -> first, q = 0; p < wp -> last; p++, q++) {
- wp -> window[q] = wp -> window[p];
- }
- /* Adjust counts. */
- wp -> last -= wp -> first;
- avail += wp -> first;
- wp -> first = 0;
- }
-
- /* Insert the buffer at the end of the window. */
- if (size < avail) {
- strcpy(&wp -> window[wp -> last], buffer);
- wp -> index[wp -> nlines] = wp -> last;
- wp -> last += size;
- wp -> nlines++;
- }
- else {
- printf("not enough room in window!!\n");
- exit(BAD_EXIT);
- }
-
- TICKX("insert");
- }
-
- /*
- Print a changed line (from file 1).
- */
- void
- print_change(int n)
- {
- int i;
- char *p, *p1, *p2;
-
- TRACEPB("print_change", printf("(%d)\n", n));
-
- p1 = &w1.window[w1.index[n]];
- p2 = &w2.window[w2.index[n]];
- p = p1;
-
- /* Do not print mismatches that involve only white space. */
- if (!b_flag && !v_flag) {
- while (*p1) {
- if (*p1 != ' ' && *p1 != '\t' && *p1 != '\n') {
- goto print;
- }
- p1++;
- }
- while (*p2) {
- if (*p2 != ' ' && *p2 != '\t' && *p2 != '\n') {
- goto print;
- }
- p2++;
- }
- RETURN_VOID("print_change");
- }
-
- print:
- if (v_flag) {
- printf("%3d %3d* %s", w1.line+n, w2.line+n, p);
- }
- else {
- printf("%3d %3d: %s", w1.line+n, w2.line+n, p);
- }
-
- TICKX("print_change");
- }
-
- /*
- Print an inserted line.
- The line comes from in1 if n1 >0, or from n2 if n2 > 0.
- */
- void
- print_insert(int n1, int n2)
- {
- char *p;
-
- TRACEPB("print_insert", printf("(%d, %d)\n", n1, n2));
-
- /* Do not print blank lines. */
- if (!b_flag && !v_flag && n1 >= 0) {
- p = &w1.window[w1.index[n1]];
- while (*p) {
- if (*p != ' ' && *p != '\t' && *p != '\n') {
- goto print;
- }
- p++;
- }
- RETURN_VOID("print_insert");
- }
- else if (!b_flag && !v_flag && n2 >= 0) {
- p = &w2.window[w2.index[n2]];
- while (*p) {
- if (*p != ' ' && *p != '\t' && *p != '\n') {
- goto print;
- }
- p++;
- }
- RETURN_VOID("print_insert");
- }
-
- print:
- if (n1 >= 0) {
- p = &w1.window[w1.index[n1]];
- printf("%3d %3s: %s", w1.line+n1, " ", p);
-
- /* -----
- if (v_flag || n1 > 0) {
- printf("%3d %3s: %s", w1.line+n1, " ", p);
- }
- else {
- printf("%3d %3d: %s", w1.line+n1, w2.line+n1, p);
- }
- ----- */
- }
- else {
- p = &w2.window[w2.index[n2]];
- printf("%3s %3d: %s", " ", w2.line+n2, p);
-
- /* -----
- if (v_flag || n2 > 0) {
- printf("%3s %3d: %s", " ", w2.line+n2, p);
- }
- else {
- printf("%3d %3d: %s", w1.line+n2, w2.line+n2, p);
- }
- ----- */
- }
-
- TICKX("print_insert");
- }
-
- /*
- Print a matched line if the -v option was given.
- */
- void
- print_match(void)
- {
- TICKB("print_match");
-
- if (v_flag) {
- printf("%3d %3d: %s",
- w1.line, w2.line, &w1.window[w1.index[0]]);
- }
-
- TICKX("print_match");
- }
-
- /*
- Return TRUE if the indicated lines match and can be used to
- resynchronize the files.
- */
- bool
- resynch(int n1, int n2)
- {
- char *p;
- int count;
-
- TRACEPB("resynch", printf("(%d, %d)\n", n1, n2));
-
- if (!compare(n1, n2)) {
- RETURN_BOOL("resynch", FALSE);
- }
-
- p = &w1.window[w1.index[n1]];
-
- /* Make sure we have a non-trivial resynch point. */
- count = 0;
- while (*p) {
- if (*p != ' ' && *p != '\t' && *p != '\n') {
- count++;
- }
- p++;
- }
-
- if ( count >= 1 ||
- (w1.eof && w2.eof && n1 == w1.nlines-1 && n2 == w2.nlines-1)
- ) {
- /* Non-trivial matched lines or match to end of file. */
- RETURN_BOOL("resynch", TRUE);
- }
- else {
- /* Trivial matched lines. Look ahead for an answer. */
- RETURN_BOOL("resynch", resynch(n1+1, n2+1));
- }
- }
-